Main Observation
Cost of the operation of the hubs is more of a factor in changing costs than shipping. [As modeled.]
In this fictional model, SCS will buy and sell USED vehicles throughout the U.S. Well, where will the vehicles be coming from and going to? Where the people are! Let’s get US population data by county or city.
There are many databases and methods of showing population and/or population density. With much credit here: https://homepage.divms.uiowa.edu/~luke/classes/STAT4580/maps.html#county-population-data, we can show population data by county.
# Get county population data
# Get file if file hasn't been downloaded already
if (! file.exists("PEP_2018_PEPANNRES.zip")) {
download.file("http://www.stat.uiowa.edu/~luke/data/PEP_2018_PEPANNRES.zip",
"PEP_2018_PEPANNRES.zip")
unzip("PEP_2018_PEPANNRES.zip")
}
pep2018 <- read.csv("PEP_2018_PEPANNRES_with_ann.csv")
pepvars <- names(pep2018)
pep2018 <- read.csv("PEP_2018_PEPANNRES_with_ann.csv", stringsAsFactors = FALSE,
head = FALSE, skip = 2)
names(pep2018) <- pepvars
# Working with the county names can be tricky:
# Show issues with data
#head(filter(pep2018, grepl(", Iowa", GEO.display.label)))
#pep2018[1803,]
#filter(pep2018, GEO.id2 == 19141)
#filter(pep2018, GEO.id2 == 22001)
# For US counties it is safer to work with the FIPS county code, which is the GEO.id2 variable.
# The county.fips data frame in the maps package links the FIPS code to region names used by the map data in the maps package.
library(maps)
#head(county.fips)
# Basic Map Data
# Map data from the map function in package maps consists of the x and y coordinates of polygons and names for the regions.
usa <- map("state", fill = TRUE, plot = FALSE)
library(ggplot2)
gusa <- map_data("state")
p <- ggplot(gusa) + geom_polygon(aes(long, lat, group = group), color = "white")
#p
# Approximate Centroids
# A quick approximation to the centroids (centers of gravity) of the polygons is to compute the center of the bounding rectangle.
# This is easiest to do with the data from map_data.
library(dplyr)
state_centroids <- summarize(group_by(gusa, region),
x = mean(range(long)), y = mean(range(lat)))
names(state_centroids)[1] <- "state"
#head(state_centroids)
# Symbol Plots of State Population
# Aggregate the county populations to the state level:
state_pops <- mutate(pep2018,
state = tolower(sub(".*, ", "", GEO.display.label)),
pop = respop72018)
#unique(state_pops$state)
state_pops <- summarize(group_by(state_pops, state),
pop = sum(pop, na.rm = TRUE))
# Using tolower matches the case in the state_centroids table.
# An alternative would be to use the state FIPS code and the state.fips table.
# Merge in the centroid locations. Using inner_join drops cases not included in the lower-48 map data.
state_pops <- inner_join(state_pops, state_centroids, "state")
#head(state_pops)
# Choropleth Maps of State Population
# A choropleth map needs to have the information for coloring all the pieces of a region.
#
# For ggplot this can be done by merging:
sp <- select(state_pops, region = state, pop)
gusa_pop <- left_join(gusa, sp, "region")
#head(gusa_pop)
## long lat group order region subregion pop
#A first attempt:
# ggplot(gusa_pop) +
# geom_polygon(aes(long, lat, group = group, fill = pop)) +
# coord_map("bonne", parameters=45) +
# ggthemes::theme_map()
# This image is dominated by the fact that most state populations are small.
# Showing population ranks, or percentile values, can help see the variation a bit better.
spr <- mutate(sp, rpop = rank(pop))
gusa_rpop <- left_join(gusa, spr, "region")
# ggplot(gusa_rpop) +
# geom_polygon(aes(long, lat, group = group, fill = rpop)) +
# coord_map("bonne", parameters=45) +
# ggthemes::theme_map()
# Using quintile bins instead of a continuous scale:
ncls <- 6
spr <- mutate(spr,
pcls = cut(pop, quantile(pop, seq(0, 1, len = ncls)),
include.lowest = TRUE))
gusa_rpop <- left_join(gusa, spr, "region")
# ggplot(gusa_rpop) +
# geom_polygon(aes(long, lat, group = group, fill = pcls),
# color = "grey") +
# coord_map("bonne", parameters=45) +
# ggthemes::theme_map() +
# scale_fill_brewer(palette = "Reds")
#A percentile bin map using the map function requires a vector of colors for the regions:
usa_states <- sub(":.*", "", usa$names)
usa_pcls <- spr$pcls[match(usa_states, spr$region)]
pal <- RColorBrewer::brewer.pal(nlevels(usa_pcls), "Reds")
#map("state", fill = TRUE, col = pal[usa_pcls], border = "grey")
#This uses the match function to find indices for each polygon’s state in the regions vector.
#Another way to compute the classes for the pieces:
library(tidyr)
usa_pieces <- data.frame(names = usa$names)
usa_pieces <- separate(usa_pieces, "names", c("region", "subregion"),
sep = ":", fill = "right")
usa_pieces <- left_join(usa_pieces, spr, "region")
#map("state", fill = TRUE, col = pal[usa_pieces$pcls], border = "grey")
# Choropleth Maps of County Population
# For a county-level ggplot map, first get the polygon data frame:
library(purrr)
library(tidyr)
library(ggplot2)
gcounty <- map_data("county")
#head(gcounty)
#To attach the FIPS code we first need to clean up the county.fips table a bit:
#head(filter(county.fips, grepl(":", polyname)))
#Remove the sub-county regions, remove duplicate rows, and split the polyname variable into region and subregion:
fipstab <-
transmute(county.fips, fips, county = sub(":.*", "", polyname))
fipstab <- unique(fipstab)
fipstab <-
separate(fipstab, county, c("region", "subregion"), sep = ",")
#head(fipstab)
#Now use left_join to merge the FIPS code into gcounty:
gcounty <- left_join(gcounty, fipstab, c("region", "subregion"))
#head(gcounty)
#Pull together the data for the map:
ncls <- 6
cpop <- select(pep2018,
fips = GEO.id2,
pop10 = rescen42010,
pop18 = respop72018)
cpop <- mutate(cpop, rpop18 = rank(pop18))
cpop <- mutate(cpop,
pcls18 = cut(pop18, quantile(pop18, seq(0, 1, len = ncls)),
include.lowest = TRUE))
#head(cpop)
#Some of the counties in the polygon data base may not appear in the population data:
#unique(select(filter(gcounty, ! fips %in% cpop$fips), region, subregion))
## region subregion
## 1 south dakota shannon
#unique(select(anti_join(gcounty, cpop, "fips"), region, subregion))
## region subregion
## 1 south dakota shannon
#A left_join with cpop will give these NA values.
gcounty_pop <- left_join(gcounty, cpop, "fips")
#unique(select(filter(gcounty_pop, is.na(rpop18)), region, subregion))
## region subregion
## 1 south dakota shannon
#County level population plots using the default continuous scale:
# ggplot(gcounty_pop) +
# geom_polygon(aes(long, lat, group = group, fill = rpop18),
# color = "grey", size = 0.1) +
# geom_polygon(aes(long, lat, group = group),
# fill = NA, data = gusa, color = "lightgrey") +
# coord_map("bonne", parameters=45) + ggthemes::theme_map()
#A discrete scale with a very different color to highlight the counties with missing information:
ggplot(gcounty_pop) +
geom_polygon(aes(long, lat, group = group, fill = pcls18),
color = "grey", size = 0.1) +
geom_polygon(aes(long, lat, group = group),
fill = NA, data = gusa, color = "lightgrey") +
coord_map("bonne", parameters=45) + ggthemes::theme_map() +
scale_fill_brewer(palette = "Reds", na.value = "white") +
theme(legend.position="none") +
labs(title = "U.S. Population by County",
subtitle = "Light to Dark == Lower to Higher Population")
This is a pleasant looking map, but doesn’t help us too much for out purposes. This data was separated in to 5 different levels of population by county. It gives us false impressions of high population areas. For instance, look at Arizona and Texas. Because the counties are so big in Arizona, it looks like the entire state is deep red and has a lot higher population than it actually does whereas Texas looks moderately populated. The same with Wyoming, which gives the appearance of being heavily populated. Also, while one can get the geographic coordinates of the center of a county, that doesn’t mean that most of the population is at the center of a county. At geographic location is necessary for getting distances between cities.
Fortunately, the Internet can link us up with U.S. Census data to get estimated population by city and also by metro area.
After some access to databases and some data manipulation, we can display some city data.
# us map of states and populations
# tidyverse because it contains the all tidy packages, including ggplot2
library(tidyverse)
#library(ggplot2)
# "us.cities" in maps package contains a database is of us cities of population
# greater than about 40,000. Also included are state capitals of any
# population size.
# "state" database produces a map of the states of the United States
# mainland generated from US Department of the Census data
library(maps)
# scales to help with scaling in ggplot
library(scales)
# state data as a tibble
us_states <- as_tibble(map_data("state"))
# city data as a tibble
us_cities <- as_tibble(us.cities)
# Filter out any cities from AK or HI because we don't expect the company to do sales there anytime soon and because it makes the map look less pleasant.
us_cities <-us_cities %>%
filter(country.etc != "AK") %>%
filter(country.etc != "HI")
# Plot states and city data
ggplot(data = us_states, mapping = aes(x = long, y = lat, group = group)) +
# state outlines black and fill area white
geom_polygon(fill= "white", color = "black") +
# add cities, make them 50% transparent
geom_point(data = us_cities, aes( x = long, y = lat,
size = pop, color = "red", alpha = 0.5),
inherit.aes = FALSE) +
# remove alpha and color from legend as they aren't needed and don't look nice
guides(alpha = FALSE) +
guides(color = FALSE) +
# add commas to legend
scale_size("Population", labels = comma) +
# a pleasant ggplot theme for this
ggthemes::theme_map() +
# move legend to the bottom
theme(legend.position="bottom") +
labs(title = "U.S. Population By City",
subtitle = "For All U.S. Cities With Greater Than 40,000 People") +
# a mercator projection
coord_map("bonne", parameters=45)
The plot above gives us a better representation of population centers. Arizona now shows only a few population centers around Phoenix and Tucson. The heavily populated areas of Los Angeles/San Diego, San Francisco/Oakland, Chicago, Boston, New York, Washington D.C., Miami, etc… are clearly shown better. And look at Wyoming. Only two cities show up in this data.
In the above, AK and HI are removed. There are 1,001 cities in this data base with populations greater than 40,000. Optimization of this many sites might doable for advanced commercial optimization algorithms, but it is far too many for what we want to do and certainly too many for the Solver that comes with MS Excel.
For this fictional sales company, it is in the early growth stage.
At this stage, we will assume they have just one hub and are looking to expand. The car company started in Houston, TX and that is where their distribution center is.
For this next phase however, we won’t use individual cities. Each new hub costs a lot in new capital. Using city data might skew our results towards states with very large cities compared to a larger amount of smaller cities pack together in metro areas. If we chose the top 25 or so cities, quite a few California cities might make the cut and might leave out a city such as Washington D.C. But U.S. Census data is available for metro areas.
After some more data wrangling, we chose the top 45 areas plus a few extras, like Spokane, WA. The first six (out of 50) of our data set looks like this below:
#### import data set ####
library(readr)
library(tidyr)
#### this data set worked separately ####
#### almost all of the prep work was done in another file ####
# Read in .csv file and create Tibble DF.
cities_raw <- read_csv("my_top_50.csv")
head(cities_raw)
## # A tibble: 6 x 4
## City State `City, State` Population
## <chr> <chr> <chr> <dbl>
## 1 Albuquerque New Mexico Albuquerque, New Mexico 918018
## 2 Atlanta Georgia Atlanta, Georgia 6020364
## 3 Baltimore Maryland Baltimore, Maryland 2800053
## 4 Birmingham Alabama Birmingham, Alabama 1090435
## 5 Boise Idaho Boise, Idaho 749202
## 6 Boston Massachusetts Boston, Massachusetts 4873019
#### now map 50 metro areas ####
# us map of states and populations
# tidyverse because it contains the all tidy packages, including ggplot2
library(tidyverse)
#library(ggplot2)
# "us.cities" in maps package contains a database is of us cities of population
# greater than about 40,000. Also included are state capitals of any
# population size.
# "state" database produces a map of the states of the United States
# mainland generated from US Department of the Census data
library(maps)
# scales to help with scaling in ggplot
library(scales)
# state data as a tibble
# Read in 50 city data with distances made in "Get Distances"
cities_50 <- read_csv("distances_my_top_50.csv")
# Get states for plotting state map
us_states <- as_tibble(map_data("state"))
# plot it
ggplot(data = us_states, mapping = aes(x = long, y = lat,
group = group)) +
geom_polygon(fill= "white", color = "black") +
geom_point(data = cities_50, aes( x = lon.from, y = lat.from,
size = from_population, color = "purple", alpha = 0.5),
inherit.aes = FALSE) +
geom_text(data = cities_50, aes(x = lon.from, y = lat.from, label = from.short), size = 3, inherit.aes = FALSE) +
guides(alpha = FALSE) +
guides(color = FALSE) +
scale_size("Population", labels = comma) +
ggthemes::theme_map() +
theme(legend.position="bottom") +
labs(title = "U.S. Population By Metro Area",
subtitle = "Using U.S. Census Definition of Metro Area")+
coord_map("bonne", parameters=45)
Now we have metro area and state. Each metro area has been named by it’s prominent city. For example, the New York metro area consists of New York, NY, Newark, NJ, and the surrounding small cities. It is lableled just “New York”. This will allow us to calculate distances very easily via Google’s API serice which uses Lat/Long or City/State. We are using City/State here.
In our fictional company, they are currently buying and selling 4,000 car per month. Why 4,000? Thanks for asking! The company did $1.2B in sales in 2019. With an average selling price of $30,000 per vehicle, that is 40,000 vehicles a year and 3,333 a month. Let’s assume they are growing. That is how we get 4,000 cars a month.
We are going to start with a small problem and work towards bigger problems. We will start with the 10 largest metro areas.
For this optimization, we have 10 cities and 1 hub (Houston, TX). As mentioned earlier, the amount of buying and selling is related to the population. We model sales locations simply by the ratio of populations. If New York metro area is 19,000,000 and Los Angeles metro area is 9,500,000, NY will have twice the sales of LA.
From our 50, we filter to the largest 10. Then we get the ratio of each city to the 10-city total and multiple by 4,000. Here is our result:
#### Top 10 metros/cities were created elsewhere. Basically to 50 filtered and number of cars to ship were updated.
# Read in data.
cities_10 <- read_csv("distances_top_10.csv")
# Filter "To == Houston" as that is all we want for the first display.
# Display top 10
cities_10 %>%
select(from, to, from_num_cars) %>%
filter(to == "Houston, Texas" )
## # A tibble: 10 x 3
## from to from_num_cars
## <chr> <chr> <dbl>
## 1 New York, New York Houston, Texas 893
## 2 Los Angeles, California Houston, Texas 614
## 3 Chicago, Illinois Houston, Texas 440
## 4 Dallas, Texas Houston, Texas 352
## 5 Houston, Texas Houston, Texas 328
## 6 Washington, District of Columbia Houston, Texas 292
## 7 Miami, Florida Houston, Texas 287
## 8 Philadelphia, Pennsylvania Houston, Texas 284
## 9 Atlanta, Georgia Houston, Texas 280
## 10 Phoenix, Arizona Houston, Texas 230
#### Now map top 10 metro areas ####
# US map of states and populations
# tidyverse because it contains the all tidy packages, including ggplot2
library(tidyverse)
#library(ggplot2)
# "us.cities" in maps package contains a database is of us cities of population
# greater than about 40,000. Also included are state capitals of any
# population size.
# "state" database produces a map of the states of the United States
# mainland generated from US Department of the Census data
library(maps)
# scales to help with scaling in ggplot
library(scales)
# Read in 10 city data with distances made in "Get Distances"
cities_10 <- read_csv("distances_top_10.csv")
# Get states for plotting state map
us_states <- as_tibble(map_data("state"))
ggplot(data = us_states, mapping = aes(x = long, y = lat,
group = group)) +
geom_polygon(fill= "white", color = "black") +
geom_point(data = cities_10, aes( x = lon.from, y = lat.from,
size = from_population, color = "purple", alpha = 0.5),
inherit.aes = FALSE) +
#geom_text(data = cities_10, aes(x = lon.from, y = lat.from, label = from), inherit.aes = FALSE) +
geom_text(data = cities_10, aes(x = lon.from, y = lat.from, label = from.short), size = 3, inherit.aes = FALSE) +
guides(alpha = FALSE) +
guides(color = FALSE) +
scale_size("Population", labels = comma) +
ggthemes::theme_map() +
theme(legend.position="bottom") +
labs(title = "10 Large Metro Areas in the U.S.")+
coord_map("bonne", parameters=45)
For our optimization, well, we need something to be optimized. We are going to optimize shipping costs. Specifically, we will minimize costs.
We figured that shipping costs might come in two sub-costs. 1) labor to load and unload and 2) cost by mile to ship. Looking through some shipping websites, it is quickly discovered that costs decrease for longer shipping distances. After some trial and error, we came up with the following heuristic shipping cost function:
Shipping Cost (in dollars) = 100 (flat rate to load and unload) + 100 *sqrt(distance)
\[
Cost(Dollars) = 100 + 100\sqrt{miles}
\] It looks like this:
# Matrix of 0's to fill with data "0 to 4000 miles"
m <- matrix(0, ncol = 2, nrow = 4000)
# Turn into a df
dist <- data.frame(m)
# Label columns
x <- c("Distance", "Cost")
colnames(dist) <- x
# Make a column of data form 1 to 4000
dist$Distance <- c(seq(1, 4000))
# Make a column using this "cost" function
dist$Cost <- sapply(dist$Distance, function(x) 100 + 15*sqrt(x))
#head(dist)
# Plot
plot(dist$Distance,dist$Cost, xlab = "Distance (miles)", ylab = "Cost ($)",
title("Shipping Cost Function"))
Now we are getting close to modeling something. For our first optimization, we will optimize the number of vehicles to ship from each of the 10 cities to the 1 hub, Houston. This, of course, is a trivial problem. But let’s work it anyway.
The optimization problem looks like this: \[min\sum_{i=1}^{10} C_iX_i\] \[st: X_i \ge S_i~~~\forall i,~i=1~to~10\] \[X_i \ge 0~~~\forall i,~i=1~to~10\]
\[X \in Integers\] ~where \(C\) is the cost from each city to Houston, where \(X\) is the calculated number of vehicles to move from each city to Houston, and where \(S\) is the number of vehicles shipped from each city to Houston.
We have identified the shipping cost function, but we didn’t actually calculate shipping costs yet.
We need the distances between each city and a potential hub. For this trivial problem, we could probably just go to Google Maps or some similar website and find 9 distances from each city to Houston (distance to Houston from Houston = 0. :) But the cost isn’t $0 in our model. There is a cost for loading and unloading at least.
For this problem, we will consider the shipping distance from in and around Houston to our hub in Houstan as 0 even though it wouldn’t actually be that. But the minimum $100 should cover that.
Getting the distance from Google Maps for driving from New York to Houston and entering that data took about 20 seconds. Doing that 10 times isn’t a big deal. But we need the distances between 50 cities. That’s 49 + 48 + 47 + 46… well, you get the idea. Fortunately, there is an easier way: Google API https://cloud.google.com/maps-platform/There are a number of others, such as Microsoft. We used Google. Our database actually needed the distance between each city and every other, including itself, both ways. We could have calculated one way and manipulated the database and also entered 0’s for each distance between a city and itself, but a simple loop and a connection to Google API data made this easy. 2,500 data calls took about 2 minutes. By “hand” would have taken 6-7 hours possibly.
Now we have the distance between each city and Houston (and every other data pair) and the cost function. We easily calculate the cost between each city.
MS Excel has a limited Solver included with the regular version of Excel. Next is a screenshot of this problem solved using the Excel optimization function.
Excel has solved this trivial problem at a cost of $2,318,286.98. You can see cost to ship between the From and To cities as “Unit Cost”. Supply/Demand has the number of vehicles that need to be shipped. “Ship” has the solved solution for this trivial problem.
Below are the raw results from this problem solved via MILP optimization in R.
The first box shows that the solver found an optimal solution and what it is.
The second box are more formal results.
The third box shows the number of cars shipped between each city.
library(tidyverse)
library(ompr)
library(magrittr)
library(ROI)
library(ROI.plugin.glpk)
library(ompr)
library(ompr.roi)
# Shipping Cost
cost <- c(705.04,690.03,593.44,332.01,100.00,662.94,617.15,689.96,522.36,614.42)
# Supply to move from each cities
supply <- c(893,614,440,352,328,292,287,284,280,230)
model <- MIPModel() %>%
# Number of cars shiped from Xi to Xj
add_variable(x[i], i = 1:10, type = "integer", lb = 0) %>%
# minimize shipping cost
set_objective(sum_expr(cost[i] * x[i], i = 1:10), "min") %>%
# must use supply from each city
add_constraint(x[i] >= supply[i], i = 1:10) #%>%
# use only one Y
#add_constraint(sum_expr(y[j], j = 1:2) == 1) %>%
# add linking variables
#result <- ROI_solve(model, solver = "glpk")
# result <- solve_model(model, with_ROI(solver = "glpk", verbose = TRUE))
# result
# get_solution(result, x[i])
result <- solve_model(model, with_ROI(solver = "glpk", verbose = TRUE))
## <SOLVER MSG> ----
## GLPK Simplex Optimizer, v4.65
## 10 rows, 10 columns, 10 non-zeros
## 0: obj = 0.000000000e+00 inf = 4.000e+03 (10)
## 10: obj = 2.318286830e+06 inf = 0.000e+00 (0)
## OPTIMAL LP SOLUTION FOUND
## GLPK Integer Optimizer, v4.65
## 10 rows, 10 columns, 10 non-zeros
## 10 integer variables, none of which are binary
## Integer optimization begins...
## Long-step dual simplex will be used
## + 10: mip = not found yet >= -inf (1; 0)
## + 10: >>>>> 2.318286830e+06 >= 2.318286830e+06 0.0% (1; 0)
## + 10: mip = 2.318286830e+06 >= tree is empty 0.0% (0; 1)
## INTEGER OPTIMAL SOLUTION FOUND
## <!SOLVER MSG> ----
result
## Status: optimal
## Objective value: 2318287
#get_solution(result, x[i])
cities_10 <- read_csv("distances_top_10.csv")
solution <- as_tibble(get_solution(result, x[i]))
solution$FROM_city <- c(cities_10$from[1:10])
solution$TO_city <- "Houston, Texas"
names(solution)[3] <- "# Cars Shipped"
solution
## # A tibble: 10 x 5
## variable i `# Cars Shipped` FROM_city TO_city
## <chr> <int> <dbl> <chr> <chr>
## 1 x 1 893 New York, New York Houston, Texas
## 2 x 2 614 New York, New York Houston, Texas
## 3 x 3 440 New York, New York Houston, Texas
## 4 x 4 352 New York, New York Houston, Texas
## 5 x 5 328 New York, New York Houston, Texas
## 6 x 6 292 New York, New York Houston, Texas
## 7 x 7 287 New York, New York Houston, Texas
## 8 x 8 284 New York, New York Houston, Texas
## 9 x 9 280 New York, New York Houston, Texas
## 10 x 10 230 New York, New York Houston, Texas
library(ggmap)
ggplot(data = us_states, mapping = aes(x = long, y = lat,
group = group)) +
geom_polygon(fill= "white", color = "black") +
geom_point(data = cities_10, aes( x = lon.from, y = lat.from,
size = from_population, color = "purple", alpha = 0.5),
inherit.aes = FALSE) +
geom_text(data = cities_10, aes(x = lon.from, y = lat.from, label = from.short), size = 3, inherit.aes = FALSE) +
theme(legend.position="bottom") +
geom_segment(data = cities_10, aes(x = lon.from, y = lat.from, xend = lon.to[5],
yend = lat.to[5]), color = "blue", size = 0.3,
arrow = arrow(), inherit.aes = FALSE) +
guides(alpha = FALSE) +
guides(color = FALSE) +
scale_size("Population", labels = comma) +
#scale_color_gradient2(midpoint=6500000,
# low="blue", high="red" ) +
#coord_map("bonne", parameters=45) +
ggthemes::theme_map() +
labs(title = "Visual of Solution Showing Arrows Indicating Shipping") +
coord_map("bonne", parameters=45)
Here is our first true optimization problem. We have 10 metro areas, the cost to ship between each and Houston or Washington and the number of cars to be shipped. Here we will let the solver chose where each city will ship that give us the least expensive solution. The optimization write-up looks like this: \[min\sum_{i=1}^{10} \sum_{j=1}^{2}C_{i,j}X_{i,j}\] \[st: \sum_{j=1}^{2}X_{i,j} \ge S_i~~~\forall i=1~to~10\] \[X_{i,j} \ge 0~~~\forall i,~i=1~to~10,~~~\forall j,~j=1~to~2\]
\[X \in Integers\] ~where \(C\) is the cost from each city to each hub, where \(X\) is the calculated number of vehicles to move from each city to each hub, and where \(S\) is the number of vehicles shipped from each city to Houston and Washington.
Excel has solved this problem at a cost of $1,634,915.13. You can see cost to ship between the From and To cities as “Unit Cost”. Supply/Demand has the number of vehicles that need to be shipped. “Ship” has the solved solution. You can see now that there is a choice between two hubs by looking in the Ship column. You can see some numbers going to Houston and some to Washington.
The first box below shows the optimal solution as cost in dollars to ship each month.
The second box shows the raw result for each city to hub pair. Right now, there are only 20 options. Row 2 has X, i=2, j=1, value = 614. This means 614 cars shipped from Los Angeles metro area (i=2) to Houston (j=1). This will get overwhelmning with larger problems so this format won’t be shown again.
The third box is the shipping solution cleaned up a bit.
library(tidyverse)
library(scales)
library(ompr)
library(magrittr)
library(ROI)
library(ROI.plugin.glpk)
library(ompr)
library(ompr.roi)
# Shipping Cost
cost <- c(705.04,690.03,593.44,332.01,100.00,662.94,617.15,689.96,522.36,614.42,
326.04,874.85,496.92,646.68,662.94,100.00,586.57,277.47,478.96,819.49)
cost_m <- matrix(cost, nrow = 10, byrow = FALSE)
#cost_m
# Supply to move from each cities
supply <- c(893,614,440,352,328,292,287,284,280,230)
model <- MIPModel() %>%
# Number of cars shiped from Xi to Xj
add_variable(x[i,j], i = 1:10, j = 1:2, type = "integer", lb = 0) %>%
# minimize shipping cost
set_objective(sum_expr(cost_m[i,j] * x[i,j], i = 1:10, j = 1:2), "min") %>%
# must use supply from each city
### fix this with J's, not 1 and 2
#add_constraint(x[i, 1] + x[i, 2] >= supply[i], i = 1:10) #%>%
# FIXED! works with j's
add_constraint(sum_expr(x[i, j], j = 1:2) >= supply[i], i = 1:10) #%>%
# use only one Y
#add_constraint(sum_expr(y[j], j = 1:2) == 1) %>%
# add linking variables
#result <- ROI_solve(model, solver = "glpk")
result <- solve_model(model, with_ROI(solver = "glpk", verbose = FALSE))
result
## Status: optimal
## Objective value: 1634917
get_solution(result, x[i,j])
## variable i j value
## 1 x 1 1 0
## 2 x 2 1 614
## 3 x 3 1 0
## 4 x 4 1 352
## 5 x 5 1 328
## 6 x 6 1 0
## 7 x 7 1 0
## 8 x 8 1 0
## 9 x 9 1 0
## 10 x 10 1 230
## 11 x 1 2 893
## 12 x 2 2 0
## 13 x 3 2 440
## 14 x 4 2 0
## 15 x 5 2 0
## 16 x 6 2 292
## 17 x 7 2 287
## 18 x 8 2 284
## 19 x 9 2 280
## 20 x 10 2 0
temp_df <- as_tibble(get_solution(result, x[i,j]))
#temp_df
### This works!!
#result <- ROI_solve(model, solver = "glpk")
#result <- solve_model(model, with_ROI(solver = "glpk", verbose = TRUE))
#result
#get_solution(result, x[i])
cities_10 <- read_csv("distances_top_10.csv")
solution <- as_tibble(get_solution(result, x[i,j]))
library(dplyr)
#solution
# Adds after the second column
solution <- solution %>%
add_column(FROM_city = 0) %>%
add_column(TO_city = 0) %>%
add_column(lon.to = 0) %>%
add_column(lat.to = 0) %>%
add_column(lon.from = 0) %>%
add_column(lat.from = 0)
from.column <- c(cities_10$to[1:10])
to.column <- c("Houston, Texas", "Washington, District of Columbia")
m <- 1
n <- 0
for (k in 1:2){
for (l in 1:10){
solution$FROM_city[m] <- from.column[l]
solution$lon.from[m] <- cities_10$lon.to[l]
solution$lat.from[m] <- cities_10$lat.to[l]
solution$TO_city[m] <- to.column[k]
solution$lon.to[m] <- cities_10$lon.to[5 + n]
solution$lat.to[m] <- cities_10$lat.to[5 + n]
m <- m + 1
}
n <- n + 1
}
solution <- solution %>%
filter(value > 0)
### This works!!!
# no clean it up
solution_display <- solution %>%
filter(value > 0) %>%
select(variable, i, j, value, FROM_city, TO_city)
solution_display
## # A tibble: 10 x 6
## variable i j value FROM_city TO_city
## <chr> <int> <int> <dbl> <chr> <chr>
## 1 x 2 1 614 Los Angeles, California Houston, Texas
## 2 x 4 1 352 Dallas, Texas Houston, Texas
## 3 x 5 1 328 Houston, Texas Houston, Texas
## 4 x 10 1 230 Phoenix, Arizona Houston, Texas
## 5 x 1 2 893 New York, New York Washington, District of…
## 6 x 3 2 440 Chicago, Illinois Washington, District of…
## 7 x 6 2 292 Washington, District of … Washington, District of…
## 8 x 7 2 287 Miami, Florida Washington, District of…
## 9 x 8 2 284 Philadelphia, Pennsylvan… Washington, District of…
## 10 x 9 2 280 Atlanta, Georgia Washington, District of…
#### now map it ####
library(tidyverse)
# "us.cities" in maps package contains This database is of us cities of population
# greater than about 40,000. Also included are state capitals of any
# population size.
# "state" database produces a map of the states of the United States
# mainland generated from US De- partment of the Census data
library(maps)
# Read in 10 city data with distances made in "Get Distances"
cities_10 <- read_csv("distances_top_10.csv")
# Get states for plotting state map
us_states <- as_tibble(map_data("state"))
ggplot(data = us_states, mapping = aes(x = long, y = lat,
group = group)) +
geom_polygon(fill= "white", color = "black") +
geom_point(data = cities_10, aes( x = lon.from, y = lat.from,
size = from_population, color = "purple", alpha = 0.5),
inherit.aes = FALSE) +
geom_text(data = cities_10, aes(x = lon.from, y = lat.from, label = from.short), size = 3, inherit.aes = FALSE) +
theme(legend.position="bottom") +
geom_segment(data = solution, aes(x = lon.from, y = lat.from, xend = lon.to,
yend = lat.to), color = "blue", size = 0.3,
arrow = arrow(), inherit.aes = FALSE) +
guides(alpha = FALSE) +
guides(color = FALSE) +
scale_size("Population", labels = comma) +
#scale_color_gradient2(midpoint=6500000,
# low="blue", high="red" ) +
#coord_map("bonne", parameters=45) +
ggthemes::theme_map() +
labs(title = "Visual Solution for 10 Cities and 2 Hubs")+
coord_map("bonne", parameters=45)
In case you were wondering, the distance from Miami to Washington D.C. is 1,058 miles and the distance from Miami to Houston is 1,188 miles. The projection type of this display creates an optical illusion that Miami is closer to Houston than Washington D.C.
Next we are going to still use Houston and Washington as our two hub options, but we will make the solver choose ONLY one of the two. The optimization problem looks like this: \[min\sum_{i=1}^{10} \sum_{j=1}^{2}C_{i,j}X_{i,j}\] \[st: \sum_{j=1}^{2}X_{i,j} \ge S_i~~~\forall i=1~to~10\] \[ \sum_{j=1}^{2}Y_j=1\] \[X_{i,j} \le MAX(S)*Y_j,~~~\forall i,~i=1~to~10,~~~\forall j,~j=1~to~2\] \[X_{i,j} \ge 0~~~\forall i,~i=1~to~10,~~~\forall j,~j=1~to~2\]
\[X \in Integers\] \[ Y \in 0~or~1\] ~where \(C\) is the cost from each city to each hub, where \(X\) is the calculated number of vehicles to move from each city to each hub, & where \(S\) is the number of vehicles shipped from each city to Houston.
These two constraints are added: \(\sum_{j=1}^{2}Y_j=1\) and \(X_{i,j} \le MAX(S)*Y_j\) and are very important to link the new variable \(Y\), which is a binary, to the shipping variable \(X\).
Above you can see that all of the cities shipped all of their cars to the Washington, D.C. hub. They ALL had to choose to the same hub. If they had chosen Houston, that would have meant that it would have cost more to ship there than DC. But they chose DC, which means that this solution must have been cheaper than Houston. And it was: $2,090,971 for shipping all to Washington, vs. $2,318,287 for shipping all to Houston.
The first box has the minimum cost solution.
The second box shows the Y variable solution, i.e. which hub was chosen.
The third box shows the X variable solution and number of cars shipped between which cities and which hubs. In this case, as we forced the solver to choose only one hub, Washington was chosen.
library(ompr)
library(magrittr)
library(ROI)
library(ROI.plugin.glpk)
library(ompr)
library(ompr.roi)
# Shipping Cost
cost <- c(705.04,690.03,593.44,332.01,100.00,662.94,617.15,689.96,522.36,614.42,
326.04,874.85,496.92,646.68,662.94,100.00,586.57,277.47,478.96,819.49)
cost_m <- matrix(cost, nrow = 10, byrow = FALSE)
#cost_m
# Supply to move from each cities
supply <- c(893,614,440,352,328,292,287,284,280,230)
model <- MIPModel() %>%
# Number of cars shiped from Xi to Xj
add_variable(x[i,j], i = 1:10, j = 1:2, type = "integer", lb = 0) %>%
# Choose Houston (Y1) or Washington (Y2)
add_variable(y[j], j = 1:2, type = "binary") %>%
#add_variable(y[j], j = 1:2, type = "integer", lb = 0, ub = 1)
# minimize shipping cost
set_objective(sum_expr(cost_m[i,j] * x[i,j], i = 1:10, j = 1:2), "min") %>%
# must use supply from each city
### fix this with J's, not 1 and 2
#add_constraint(x[i, 1] + x[i, 2] >= supply[i], i = 1:10) #%>%
# FIXED! works with j's
add_constraint(sum_expr(x[i, j], j = 1:2) >= supply[i], i = 1:10) %>%
# use only one Y
add_constraint(sum_expr(y[j], j = 1:2) == 1) %>%
# add linking variables
add_constraint(x[i,j] <= 1000*y[j], i = 1:10, j = 1:2)
# add_constraint(x[i,1] <= 1000*y[1], i = 1:10) %>%
# add_constraint(x[i,2] <= 1000*y[2], i = 1:10)
#result <- ROI_solve(model, solver = "glpk")
result <- solve_model(model, with_ROI(solver = "glpk", verbose = FALSE))
result
## Status: optimal
## Objective value: 2090971
#get_solution(result, x[i,j])
#get_solution(result, y[j])
cities_10 <- read_csv("distances_top_10.csv")
solution <- as_tibble(get_solution(result, x[i,j]))
#solution
library(dplyr)
# get hub solution
solution_hub <- as_tibble(get_solution(result, y[j]))
to.column <- c("Houston, Texas", "Washington, District of Columbia")
solution_hub <- solution_hub %>%
add_column(Hub = 0)
solution_hub$Hub <- to.column
solution_hub
## # A tibble: 2 x 4
## variable j value Hub
## <chr> <int> <dbl> <chr>
## 1 y 1 0 Houston, Texas
## 2 y 2 1 Washington, District of Columbia
# Adds after the second column
solution <- solution %>%
add_column(FROM_city = 0) %>%
add_column(TO_city = 0) %>%
add_column(lon.to = 0) %>%
add_column(lat.to = 0) %>%
add_column(lon.from = 0) %>%
add_column(lat.from = 0)
from.column <- c(cities_10$to[1:10])
to.column <- c("Houston, Texas", "Washington, District of Columbia")
m <- 1
n <- 0
for (k in 1:2){
for (l in 1:10){
solution$FROM_city[m] <- from.column[l]
solution$lon.from[m] <- cities_10$lon.to[l]
solution$lat.from[m] <- cities_10$lat.to[l]
solution$TO_city[m] <- to.column[k]
solution$lon.to[m] <- cities_10$lon.to[5 + n]
solution$lat.to[m] <- cities_10$lat.to[5 + n]
m <- m + 1
}
n <- n + 1
}
solution <- solution %>%
filter(value > 0)
### This works!!!
# no clean it up
solution_display <- solution %>%
filter(value > 0) %>%
select(variable, i, j, value, FROM_city, TO_city)
solution_display
## # A tibble: 10 x 6
## variable i j value FROM_city TO_city
## <chr> <int> <int> <dbl> <chr> <chr>
## 1 x 1 2 893 New York, New York Washington, District of…
## 2 x 2 2 614 Los Angeles, California Washington, District of…
## 3 x 3 2 440 Chicago, Illinois Washington, District of…
## 4 x 4 2 352 Dallas, Texas Washington, District of…
## 5 x 5 2 328 Houston, Texas Washington, District of…
## 6 x 6 2 292 Washington, District of … Washington, District of…
## 7 x 7 2 287 Miami, Florida Washington, District of…
## 8 x 8 2 284 Philadelphia, Pennsylvan… Washington, District of…
## 9 x 9 2 280 Atlanta, Georgia Washington, District of…
## 10 x 10 2 230 Phoenix, Arizona Washington, District of…
#### now map it ####
library(tidyverse)
# "us.cities" in maps package contains This database is of us cities of population
# greater than about 40,000. Also included are state capitals of any
# population size.
# "state" database produces a map of the states of the United States
# mainland generated from US De- partment of the Census data
library(maps)
# Read in 10 city data with distances made in "Get Distances"
cities_10 <- read_csv("distances_top_10.csv")
# Get states for plotting state map
us_states <- as_tibble(map_data("state"))
ggplot(data = us_states, mapping = aes(x = long, y = lat,
group = group)) +
geom_polygon(fill= "white", color = "black") +
geom_point(data = cities_10, aes( x = lon.from, y = lat.from,
size = from_population, color = "purple", alpha = 0.5),
inherit.aes = FALSE) +
geom_text(data = cities_10, aes(x = lon.from, y = lat.from, label = from.short), size = 3, inherit.aes = FALSE) +
theme(legend.position="bottom") +
geom_segment(data = solution, aes(x = lon.from, y = lat.from, xend = lon.to,
yend = lat.to), color = "blue", size = 0.3,
arrow = arrow(), inherit.aes = FALSE) +
guides(alpha = FALSE) +
guides(color = FALSE) +
scale_size("Population", labels = comma) +
#scale_color_gradient2(midpoint=6500000,
# low="blue", high="red" ) +
#coord_map("bonne", parameters=45) +
ggthemes::theme_map() +
labs(title = "Visual Solution for 10 Cities and a Choice Between 2 Hubs: Houston and Washington") +
coord_map("bonne", parameters=45)
Now we have our basic ideas down, we can scale up. We can set up to choose the number of hubs between 1 and 10 and then solve.
The optimization problem looks like this: \[min\sum_{i=1}^{10} \sum_{j=1}^{10}C_{i,j}X_{i,j}\] \[st: \sum_{j=1}^{10}X_{i,j} \ge S_i~~~\forall i=1~to~10\] \[ \sum_{j=1}^{10}Y_j=NumHubs\] \[X_{i,j} \le MAX(S)*Y_j,~~~\forall i,~i=1~to~10,~~~\forall j,~j=1~to~10\] \[X_{i,j} \ge 0~~~\forall i,~i=1~to~10,~~~\forall j,~j=1~to~10\]
\[X \in Integers\] \[ Y \in 0~or~1\] ~where \(C\) is the cost from each city to each hub, where \(X\) is the calculated number of vehicles to move from each city to each hub, where \(Y\) is which hubs were chosen in the solution, and where \(S\) is the number of vehicles shipped from each city to a hub.
Trivial problem - 10 Cities, 2 hubs
As you can see, Excel can’t do this. We have quickly reached the limit of what the internal Solver than came with Excel can do. There are commercial add-ins to Excel that can be purchased that will allow bigger optimization problems. But we can keep going with R.
Here we will keep using the same 10 large metro areas and choose the number of hubs we want to use. The number of hubs can be from 1 to 10. We randomly chose 3.
#### Solution in R
The first box below has the “hubs” solution: New York, Los Angeles, and Dallas were chosen. The second box shows the number of cars shipped.
# Choose more than two, any two
#### import data set ####
library(readr)
library(tidyr)
library(dplyr)
# Read in .csv file and create Tibble DF.
cities_raw <- read_csv("distances_my_top_50.csv")
# Turn into a dataframe
city_data <- as_tibble(cities_raw)
get_supply <- as_tibble(cities_raw)
# Add a number for each city 1 to 50
city_data <- city_data %>%
add_column(num.from = 0, .after = 4) %>%
add_column(num.to = 0, .after = 6)
# number from and to numbers
xx <- 1
for (ii in 1:50){
for (jj in 1:50) {
city_data$num.from[xx] <- ii
city_data$num.to[xx] <- jj
xx <- xx + 1
}
}
# Choose number of cities to use
num_cities <- 10
#data <- as.data.frame(Network_Modeling)
# Get top six in each set of TO and FROM
# city_data <- city_data %>%
# group_by(num.from) %>%
# slice_min(order_by = num.from, n = num_cities) %>%
# group_by(num.to) %>%
# slice_min(order_by = num.to, n = num_cities) %>%
# arrange(num.from)
city_data <- city_data %>%
group_by(num.from) %>%
slice_head(n = num_cities) %>%
group_by(num.to) %>%
slice_head(n = num_cities) %>%
group_by(num.from)
# redo number of cars from each city
# get supply
num_cars_month <- 4000
#library(dplyr)
get_supply <- get_supply %>%
slice_head(n = num_cities) %>%
#arrange(desc(Population)) %>% # sort Tibble by Population and descending
#slice_head(n = 11) %>% # Get only the first n rows.
mutate(to_pop_ratio = to_population / sum(to_population)) %>% # percent of Population
mutate(to_num_cars = round(to_pop_ratio * num_cars_month,0)) # Calc number cars moving each month
# make into a cost matrix
cost <- city_data$cost
cost_6_city <- matrix(cost, nrow = num_cities, byrow = FALSE)
library(ompr)
library(magrittr)
library(ROI)
library(ROI.plugin.glpk)
library(ompr)
library(ompr.roi)
# Shipping Cost
# cost <- c(705.04,690.03,593.44,332.01,100.00,662.94,617.15,689.96,522.36,614.42,
# 326.04,874.85,496.92,646.68,662.94,100.00,586.57,277.47,478.96,819.49)
# cost_m <- matrix(cost, nrow = 10, byrow = FALSE)
# cost_m
# Supply to move from each cities
#supply <- as.vector(city_data$`Number of cars Shipped From`[1:6])
# the above wasn't resized for 6 cities
supply <- as.vector(get_supply$to_num_cars)
# model <- MIPModel() %>%
# # Number of cars shiped from Xi to Xj
# add_variable(x[i,j], i = 1:6, j = 1:6, type = "integer", lb = 0) %>%
# # Choose Houston (Y1) or Washington (Y2)
# add_variable(y[j], j = 1:6, type = "binary") %>%
# #add_variable(y[j], j = 1:2, type = "integer", lb = 0, ub = 1)
# # minimize shipping cost
# set_objective(sum_expr(cost_6_city[i,j] * x[i,j], i = 1:6, j = 1:6), "min") %>%
# # must use supply from each city
#
# ### fix this with J's, not 1 and 2
# #add_constraint(x[i, 1] + x[i, 2] >= supply[i], i = 1:10) #%>%
# # FIXED! works with j's
# add_constraint(sum_expr(x[i, j], j = 1:6) >= supply[i], i = 1:6) %>%
# # add this to keep Houston
# add_constraint(y[5] == 1) %>%
# add_constraint(sum_expr(y[j], j = 1:6) == 2) %>%
# # add linking variables
# # 1500 because the new limit should be 1224
# add_constraint(x[i,j] <= 1500*y[j], i = 1:6, j = 1:6)
#
#
# #result <- ROI_solve(model, solver = "glpk")
# result <- solve_model(model, with_ROI(solver = "glpk", verbose = TRUE))
# result
# get_solution(result, x[i,j])
# get_solution(result, y[j])
num_hubs <- 3
model <- MIPModel() %>%
# Number of cars shiped from Xi to Xj
add_variable(x[i,j], i = 1:length(supply), j = 1:length(supply), type = "integer", lb = 0) %>%
# Choose Houston (Y1) or Washington (Y2)
add_variable(y[j], j = 1:length(supply), type = "binary") %>%
#add_variable(y[j], j = 1:2, type = "integer", lb = 0, ub = 1)
# minimize shipping cost
set_objective(sum_expr(cost_6_city[i,j] * x[i,j], i = 1:length(supply), j = 1:length(supply)), "min") %>%
# must use supply from each city
### fix this with J's, not 1 and 2
#add_constraint(x[i, 1] + x[i, 2] >= supply[i], i = 1:10) #%>%
# FIXED! works with j's
add_constraint(sum_expr(x[i, j], j = 1:length(supply)) >= supply[i], i = 1:length(supply)) %>%
# add this to keep Houston
#add_constraint(y[5] == 1) %>%
add_constraint(sum_expr(y[j], j = 1:length(supply)) == num_hubs) %>%
# add linking variables
# 1500 because the new limit should be 1224
add_constraint(x[i,j] <= max(supply)*y[j], i = 1:length(supply), j = 1:length(supply))
#result <- ROI_solve(model, solver = "glpk")
result <- solve_model(model, with_ROI(solver = "glpk", verbose = FALSE))
result
## Status: optimal
## Objective value: 1108548
#get_solution(result, x[i,j])
#get_solution(result, y[j])
#cities_10 <- read_csv("distances_top_10.csv")
solution <- as_tibble(get_solution(result, x[i,j]))
#solution
library(dplyr)
# get hub solution
solution_hub <- as_tibble(get_solution(result, y[j]))
to.column <- as.vector(get_supply$to)
solution_hub <- solution_hub %>%
add_column(Hub = 0)
solution_hub$Hub <- to.column
solution_hub
## # A tibble: 10 x 4
## variable j value Hub
## <chr> <int> <dbl> <chr>
## 1 y 1 1 New York, New York
## 2 y 2 1 Los Angeles, California
## 3 y 3 0 Chicago, Illinois
## 4 y 4 1 Dallas, Texas
## 5 y 5 0 Houston, Texas
## 6 y 6 0 Washington, District of Columbia
## 7 y 7 0 Miami, Florida
## 8 y 8 0 Philadelphia, Pennsylvania
## 9 y 9 0 Atlanta, Georgia
## 10 y 10 0 Phoenix, Arizona
# Adds after the second column
solution <- solution %>%
add_column(FROM_city = 0) %>%
add_column(TO_city = 0) %>%
add_column(lon.to = 0) %>%
add_column(lat.to = 0) %>%
add_column(lon.from = 0) %>%
add_column(lat.from = 0)
#m <- 1
for ( k in 1:length(city_data$lon.to)){
solution$FROM_city[k] <- city_data$from[k]
solution$lon.from[k] <- city_data$lon.from[k]
solution$lat.from[k] <- city_data$lat.from[k]
solution$TO_city[k] <- city_data$to[k]
solution$lon.to[k] <- city_data$lon.to[k]
solution$lat.to[k] <- city_data$lat.to[k]
#m < m + 1
}
#solution
# from.column <- c(cities_10$to[1:10])
# to.column <- c("Houston, Texas", "Washington, District of Columbia")
# m <- 1
# n <- 0
# for (k in 1:2){
# for (l in 1:10){
# solution$FROM_city[m] <- from.column[l]
# solution$lon.from[m] <- cities_10$lon.to[l]
# solution$lat.from[m] <- cities_10$lat.to[l]
# solution$TO_city[m] <- to.column[k]
# solution$lon.to[m] <- cities_10$lon.to[5 + n]
# solution$lat.to[m] <- cities_10$lat.to[5 + n]
# m <- m + 1
# }
# n <- n + 1
# }
### This works!!!
# no clean it up
# solution <- solution %>%
# filter(value > 0)
solution <- solution %>%
filter(value > 0)
#solution
### This works!!!
# no clean it up
solution_display <- solution %>%
filter(value > 0) %>%
select(variable, i, j, value, FROM_city, TO_city)
solution_display
## # A tibble: 10 x 6
## variable i j value FROM_city TO_city
## <chr> <int> <int> <dbl> <chr> <chr>
## 1 x 1 1 893 New York, New York New York, New York
## 2 x 3 1 440 Chicago, Illinois New York, New York
## 3 x 6 1 292 Washington, District of Colu… New York, New York
## 4 x 7 1 287 Miami, Florida New York, New York
## 5 x 8 1 284 Philadelphia, Pennsylvania New York, New York
## 6 x 2 2 614 Los Angeles, California Los Angeles, Califo…
## 7 x 10 2 230 Phoenix, Arizona Los Angeles, Califo…
## 8 x 4 4 352 Dallas, Texas Dallas, Texas
## 9 x 5 4 328 Houston, Texas Dallas, Texas
## 10 x 9 4 280 Atlanta, Georgia Dallas, Texas
#### now map it ####
library(tidyverse)
# "us.cities" in maps package contains This database is of us cities of population
# greater than about 40,000. Also included are state capitals of any
# population size.
# "state" database produces a map of the states of the United States
# mainland generated from US De- partment of the Census data
library(maps)
# Read in 10 city data with distances made in "Get Distances"
cities_10 <- read_csv("distances_top_10.csv")
# Get states for plotting state map
us_states <- as_tibble(map_data("state"))
ggplot(data = us_states, mapping = aes(x = long, y = lat,
group = group)) +
geom_polygon(fill= "white", color = "black") +
geom_point(data = city_data, aes( x = lon.from, y = lat.from,
size = from_population, color = "purple", alpha = 0.5),
inherit.aes = FALSE) +
geom_text(data = cities_10, aes(x = lon.from, y = lat.from, label = from.short), size = 3, inherit.aes = FALSE) +
geom_segment(data = solution, aes(x = lon.from, y = lat.from, xend = lon.to,
yend = lat.to), color = "blue", size = 0.3,
arrow = arrow(), inherit.aes = FALSE) +
guides(alpha = FALSE) +
guides(color = FALSE) +
scale_size("Population", labels = comma) +
#scale_color_gradient2(midpoint=6500000,
# low="blue", high="red" ) +
#coord_map("bonne", parameters=45) +
ggthemes::theme_map() +
labs(title = "Visual Solution for 10 Cities and 3 Hubs")+
coord_map("bonne", parameters=45)
We are now using all of the 50 large metros areas we found earlier. We will model our growing car sales company. If their first hub was in Houston (and they don’t want to move it at this time), where should the next hub be located.
#### Solution in R
# Choose more than two, any two
#### import data set ####
library(scales)
library(tidyverse)
library(readr)
library(tidyr)
library(dplyr)
# Read in .csv file and create Tibble DF.
cities_raw <- read_csv("distances_my_top_50.csv")
# Turn into a dataframe
city_data <- as_tibble(cities_raw)
get_supply <- as_tibble(cities_raw)
# Add a number for each city 1 to 50
city_data <- city_data %>%
add_column(num.from = 0, .after = 4) %>%
add_column(num.to = 0, .after = 6)
# number from and to numbers
xx <- 1
for (ii in 1:50){
for (jj in 1:50) {
city_data$num.from[xx] <- ii
city_data$num.to[xx] <- jj
xx <- xx + 1
}
}
# Choose number of cities to use
num_cities <- 50
#data <- as.data.frame(Network_Modeling)
# Get top six in each set of TO and FROM
# city_data <- city_data %>%
# group_by(num.from) %>%
# slice_min(order_by = num.from, n = num_cities) %>%
# group_by(num.to) %>%
# slice_min(order_by = num.to, n = num_cities) %>%
# arrange(num.from)
city_data <- city_data %>%
group_by(num.from) %>%
slice_head(n = num_cities) %>%
group_by(num.to) %>%
slice_head(n = num_cities) %>%
group_by(num.from)
# redo number of cars from each city
# get supply
num_cars_month <- 4000
#library(dplyr)
get_supply <- get_supply %>%
slice_head(n = num_cities) %>%
#arrange(desc(Population)) %>% # sort Tibble by Population and descending
#slice_head(n = 11) %>% # Get only the first n rows.
mutate(to_pop_ratio = to_population / sum(to_population)) %>% # percent of Population
mutate(to_num_cars = round(to_pop_ratio * num_cars_month,0)) # Calc number cars moving each month
# make into a cost matrix
cost <- city_data$cost
cost_6_city <- matrix(cost, nrow = num_cities, byrow = FALSE)
library(ompr)
library(magrittr)
library(ROI)
library(ROI.plugin.glpk)
library(ompr)
library(ompr.roi)
# Shipping Cost
# cost <- c(705.04,690.03,593.44,332.01,100.00,662.94,617.15,689.96,522.36,614.42,
# 326.04,874.85,496.92,646.68,662.94,100.00,586.57,277.47,478.96,819.49)
# cost_m <- matrix(cost, nrow = 10, byrow = FALSE)
# cost_m
# Supply to move from each cities
#supply <- as.vector(city_data$`Number of cars Shipped From`[1:6])
# the above wasn't resized for 6 cities
supply <- as.vector(get_supply$to_num_cars)
# model <- MIPModel() %>%
# # Number of cars shiped from Xi to Xj
# add_variable(x[i,j], i = 1:6, j = 1:6, type = "integer", lb = 0) %>%
# # Choose Houston (Y1) or Washington (Y2)
# add_variable(y[j], j = 1:6, type = "binary") %>%
# #add_variable(y[j], j = 1:2, type = "integer", lb = 0, ub = 1)
# # minimize shipping cost
# set_objective(sum_expr(cost_6_city[i,j] * x[i,j], i = 1:6, j = 1:6), "min") %>%
# # must use supply from each city
#
# ### fix this with J's, not 1 and 2
# #add_constraint(x[i, 1] + x[i, 2] >= supply[i], i = 1:10) #%>%
# # FIXED! works with j's
# add_constraint(sum_expr(x[i, j], j = 1:6) >= supply[i], i = 1:6) %>%
# # add this to keep Houston
# add_constraint(y[5] == 1) %>%
# add_constraint(sum_expr(y[j], j = 1:6) == 2) %>%
# # add linking variables
# # 1500 because the new limit should be 1224
# add_constraint(x[i,j] <= 1500*y[j], i = 1:6, j = 1:6)
#
#
# #result <- ROI_solve(model, solver = "glpk")
# result <- solve_model(model, with_ROI(solver = "glpk", verbose = TRUE))
# result
# get_solution(result, x[i,j])
# get_solution(result, y[j])
num_hubs <- 2
model <- MIPModel() %>%
# Number of cars shiped from Xi to Xj
add_variable(x[i,j], i = 1:length(supply), j = 1:length(supply), type = "integer", lb = 0) %>%
# Choose Houston (Y1) or Washington (Y2)
add_variable(y[j], j = 1:length(supply), type = "binary") %>%
#add_variable(y[j], j = 1:2, type = "integer", lb = 0, ub = 1)
# minimize shipping cost
set_objective(sum_expr(cost_6_city[i,j] * x[i,j], i = 1:length(supply), j = 1:length(supply)), "min") %>%
# must use supply from each city
### fix this with J's, not 1 and 2
#add_constraint(x[i, 1] + x[i, 2] >= supply[i], i = 1:10) #%>%
# FIXED! works with j's
add_constraint(sum_expr(x[i, j], j = 1:length(supply)) >= supply[i], i = 1:length(supply)) %>%
# add this to keep Houston
add_constraint(y[5] == 1) %>%
add_constraint(sum_expr(y[j], j = 1:length(supply)) == num_hubs) %>%
# add linking variables
# 1500 because the new limit should be 1224
add_constraint(x[i,j] <= max(supply)*y[j], i = 1:length(supply), j = 1:length(supply))
#result <- ROI_solve(model, solver = "glpk")
result <- solve_model(model, with_ROI(solver = "glpk", verbose = FALSE))
result
## Status: optimal
## Objective value: 1824953
#get_solution(result, x[i,j])
#get_solution(result, y[j])
#cities_10 <- read_csv("distances_top_10.csv")
solution <- as_tibble(get_solution(result, x[i,j]))
#solution
library(dplyr)
# get hub solution
solution_hub <- as_tibble(get_solution(result, y[j]))
to.column <- as.vector(get_supply$to)
solution_hub <- solution_hub %>%
add_column(Hub = 0)
solution_hub$Hub <- to.column
solution_hub <- solution_hub %>%
filter(value > 0)
solution_hub
## # A tibble: 2 x 4
## variable j value Hub
## <chr> <int> <dbl> <chr>
## 1 y 1 1 New York, New York
## 2 y 5 1 Houston, Texas
# Adds after the second column
solution <- solution %>%
add_column(FROM_city = 0) %>%
add_column(TO_city = 0) %>%
add_column(lon.to = 0) %>%
add_column(lat.to = 0) %>%
add_column(lon.from = 0) %>%
add_column(lat.from = 0)
#m <- 1
for ( k in 1:length(city_data$lon.to)){
solution$FROM_city[k] <- city_data$from[k]
solution$lon.from[k] <- city_data$lon.from[k]
solution$lat.from[k] <- city_data$lat.from[k]
solution$TO_city[k] <- city_data$to[k]
solution$lon.to[k] <- city_data$lon.to[k]
solution$lat.to[k] <- city_data$lat.to[k]
#m < m + 1
}
#solution
# from.column <- c(cities_10$to[1:10])
# to.column <- c("Houston, Texas", "Washington, District of Columbia")
# m <- 1
# n <- 0
# for (k in 1:2){
# for (l in 1:10){
# solution$FROM_city[m] <- from.column[l]
# solution$lon.from[m] <- cities_10$lon.to[l]
# solution$lat.from[m] <- cities_10$lat.to[l]
# solution$TO_city[m] <- to.column[k]
# solution$lon.to[m] <- cities_10$lon.to[5 + n]
# solution$lat.to[m] <- cities_10$lat.to[5 + n]
# m <- m + 1
# }
# n <- n + 1
# }
### This works!!!
# no clean it up
solution <- solution %>%
filter(value > 0)
solution_display <- solution %>%
filter(value > 0) %>%
select(variable, i, j, value, FROM_city, TO_city)
solution_display
## # A tibble: 50 x 6
## variable i j value FROM_city TO_city
## <chr> <int> <int> <dbl> <chr> <chr>
## 1 x 1 1 450 New York, New York New York, New Yo…
## 2 x 3 1 222 Chicago, Illinois New York, New Yo…
## 3 x 6 1 147 Washington, District of Columbia New York, New Yo…
## 4 x 8 1 143 Philadelphia, Pennsylvania New York, New Yo…
## 5 x 11 1 114 Boston, Massachusetts New York, New Yo…
## 6 x 13 1 101 Detroit, Michigan New York, New Yo…
## 7 x 20 1 66 Baltimore, Maryland New York, New Yo…
## 8 x 21 1 62 Charlotte, North Carolina New York, New Yo…
## 9 x 25 1 54 Pittsburgh, Pennsylvania New York, New Yo…
## 10 x 27 1 52 Cincinnati, Ohio New York, New Yo…
## # … with 40 more rows
#### now map it ####
library(tidyverse)
# "us.cities" in maps package contains This database is of us cities of population
# greater than about 40,000. Also included are state capitals of any
# population size.
# "state" database produces a map of the states of the United States
# mainland generated from US De- partment of the Census data
library(maps)
# Read in 10 city data with distances made in "Get Distances"
cities_10 <- read_csv("distances_top_10.csv")
# Get states for plotting state map
us_states <- as_tibble(map_data("state"))
ggplot(data = us_states, mapping = aes(x = long, y = lat,
group = group)) +
geom_polygon(fill= "white", color = "black") +
geom_point(data = city_data, aes( x = lon.from, y = lat.from,
size = from_population, color = "purple", alpha = 0.5),
inherit.aes = FALSE) +
geom_text(data = cities_10, aes(x = lon.from, y = lat.from, label = from.short), size = 3, inherit.aes = FALSE) +
geom_segment(data = solution, aes(x = lon.from, y = lat.from, xend = lon.to,
yend = lat.to), color = "blue", size = 0.3,
arrow = arrow(), inherit.aes = FALSE) +
guides(alpha = FALSE) +
guides(color = FALSE) +
scale_size("Population", labels = comma) +
#scale_color_gradient2(midpoint=6500000,
# low="blue", high="red" ) +
#coord_map("bonne", parameters=45) +
ggthemes::theme_map() +
labs(title = "Visual Solution for 50 Cities and 2 Hubs (1 == Houston)")+
coord_map("bonne", parameters=45)
# Choose more than two, any two
#### import data set ####
library(readr)
library(tidyr)
library(dplyr)
# Read in .csv file and create Tibble DF.
cities_raw <- read_csv("distances_my_top_50.csv")
# Turn into a dataframe
city_data <- as_tibble(cities_raw)
get_supply <- as_tibble(cities_raw)
# Add a number for each city 1 to 50
city_data <- city_data %>%
add_column(num.from = 0, .after = 4) %>%
add_column(num.to = 0, .after = 6)
# number from and to numbers
xx <- 1
for (ii in 1:50){
for (jj in 1:50) {
city_data$num.from[xx] <- ii
city_data$num.to[xx] <- jj
xx <- xx + 1
}
}
# Choose number of cities to use
num_cities <- 50
#data <- as.data.frame(Network_Modeling)
# Get top six in each set of TO and FROM
# city_data <- city_data %>%
# group_by(num.from) %>%
# slice_min(order_by = num.from, n = num_cities) %>%
# group_by(num.to) %>%
# slice_min(order_by = num.to, n = num_cities) %>%
# arrange(num.from)
city_data <- city_data %>%
group_by(num.from) %>%
slice_head(n = num_cities) %>%
group_by(num.to) %>%
slice_head(n = num_cities) %>%
group_by(num.from)
# redo number of cars from each city
# get supply
num_cars_month <- 4000
#library(dplyr)
get_supply <- get_supply %>%
slice_head(n = num_cities) %>%
#arrange(desc(Population)) %>% # sort Tibble by Population and descending
#slice_head(n = 11) %>% # Get only the first n rows.
mutate(to_pop_ratio = to_population / sum(to_population)) %>% # percent of Population
mutate(to_num_cars = round(to_pop_ratio * num_cars_month,0)) # Calc number cars moving each month
# make into a cost matrix
cost <- city_data$cost
cost_6_city <- matrix(cost, nrow = num_cities, byrow = FALSE)
library(ompr)
library(magrittr)
library(ROI)
library(ROI.plugin.glpk)
library(ompr)
library(ompr.roi)
# Shipping Cost
# cost <- c(705.04,690.03,593.44,332.01,100.00,662.94,617.15,689.96,522.36,614.42,
# 326.04,874.85,496.92,646.68,662.94,100.00,586.57,277.47,478.96,819.49)
# cost_m <- matrix(cost, nrow = 10, byrow = FALSE)
# cost_m
# Supply to move from each cities
#supply <- as.vector(city_data$`Number of cars Shipped From`[1:6])
# the above wasn't resized for 6 cities
supply <- as.vector(get_supply$to_num_cars)
# model <- MIPModel() %>%
# # Number of cars shiped from Xi to Xj
# add_variable(x[i,j], i = 1:6, j = 1:6, type = "integer", lb = 0) %>%
# # Choose Houston (Y1) or Washington (Y2)
# add_variable(y[j], j = 1:6, type = "binary") %>%
# #add_variable(y[j], j = 1:2, type = "integer", lb = 0, ub = 1)
# # minimize shipping cost
# set_objective(sum_expr(cost_6_city[i,j] * x[i,j], i = 1:6, j = 1:6), "min") %>%
# # must use supply from each city
#
# ### fix this with J's, not 1 and 2
# #add_constraint(x[i, 1] + x[i, 2] >= supply[i], i = 1:10) #%>%
# # FIXED! works with j's
# add_constraint(sum_expr(x[i, j], j = 1:6) >= supply[i], i = 1:6) %>%
# # add this to keep Houston
# add_constraint(y[5] == 1) %>%
# add_constraint(sum_expr(y[j], j = 1:6) == 2) %>%
# # add linking variables
# # 1500 because the new limit should be 1224
# add_constraint(x[i,j] <= 1500*y[j], i = 1:6, j = 1:6)
#
#
# #result <- ROI_solve(model, solver = "glpk")
# result <- solve_model(model, with_ROI(solver = "glpk", verbose = TRUE))
# result
# get_solution(result, x[i,j])
# get_solution(result, y[j])
num_hubs <- 3
model <- MIPModel() %>%
# Number of cars shiped from Xi to Xj
add_variable(x[i,j], i = 1:length(supply), j = 1:length(supply), type = "integer", lb = 0) %>%
# Choose Houston (Y1) or Washington (Y2)
add_variable(y[j], j = 1:length(supply), type = "binary") %>%
#add_variable(y[j], j = 1:2, type = "integer", lb = 0, ub = 1)
# minimize shipping cost
set_objective(sum_expr(cost_6_city[i,j] * x[i,j], i = 1:length(supply), j = 1:length(supply)), "min") %>%
# must use supply from each city
### fix this with J's, not 1 and 2
#add_constraint(x[i, 1] + x[i, 2] >= supply[i], i = 1:10) #%>%
# FIXED! works with j's
add_constraint(sum_expr(x[i, j], j = 1:length(supply)) >= supply[i], i = 1:length(supply)) %>%
# add this to keep Houston
#add_constraint(y[5] == 1) %>%
add_constraint(sum_expr(y[j], j = 1:length(supply)) == num_hubs) %>%
# add linking variables
# 1500 because the new limit should be 1224
add_constraint(x[i,j] <= max(supply)*y[j], i = 1:length(supply), j = 1:length(supply))
#result <- ROI_solve(model, solver = "glpk")
result <- solve_model(model, with_ROI(solver = "glpk", verbose = FALSE))
result
## Status: optimal
## Objective value: 1442136
#get_solution(result, x[i,j])
#get_solution(result, y[j])
#cities_10 <- read_csv("distances_top_10.csv")
solution <- as_tibble(get_solution(result, x[i,j]))
#solution
library(dplyr)
# get hub solution
solution_hub <- as_tibble(get_solution(result, y[j]))
to.column <- as.vector(get_supply$to)
solution_hub <- solution_hub %>%
add_column(Hub = 0)
solution_hub$Hub <- to.column
solution_hub <- solution_hub %>%
filter(value > 0)
solution_hub
## # A tibble: 3 x 4
## variable j value Hub
## <chr> <int> <dbl> <chr>
## 1 y 1 1 New York, New York
## 2 y 2 1 Los Angeles, California
## 3 y 32 1 Nashville, Tennessee
# Adds after the second column
solution <- solution %>%
add_column(FROM_city = 0) %>%
add_column(TO_city = 0) %>%
add_column(lon.to = 0) %>%
add_column(lat.to = 0) %>%
add_column(lon.from = 0) %>%
add_column(lat.from = 0)
#m <- 1
for ( k in 1:length(city_data$lon.to)){
solution$FROM_city[k] <- city_data$from[k]
solution$lon.from[k] <- city_data$lon.from[k]
solution$lat.from[k] <- city_data$lat.from[k]
solution$TO_city[k] <- city_data$to[k]
solution$lon.to[k] <- city_data$lon.to[k]
solution$lat.to[k] <- city_data$lat.to[k]
#m < m + 1
}
#solution
# from.column <- c(cities_10$to[1:10])
# to.column <- c("Houston, Texas", "Washington, District of Columbia")
# m <- 1
# n <- 0
# for (k in 1:2){
# for (l in 1:10){
# solution$FROM_city[m] <- from.column[l]
# solution$lon.from[m] <- cities_10$lon.to[l]
# solution$lat.from[m] <- cities_10$lat.to[l]
# solution$TO_city[m] <- to.column[k]
# solution$lon.to[m] <- cities_10$lon.to[5 + n]
# solution$lat.to[m] <- cities_10$lat.to[5 + n]
# m <- m + 1
# }
# n <- n + 1
# }
### This works!!!
# no clean it up
solution <- solution %>%
filter(value > 0)
#solution
### This works!!!
# no clean it up
solution_display <- solution %>%
filter(value > 0) %>%
select(variable, i, j, value, FROM_city, TO_city)
solution_display
## # A tibble: 50 x 6
## variable i j value FROM_city TO_city
## <chr> <int> <int> <dbl> <chr> <chr>
## 1 x 1 1 450 New York, New York New York, New Yo…
## 2 x 6 1 147 Washington, District of Columbia New York, New Yo…
## 3 x 8 1 143 Philadelphia, Pennsylvania New York, New Yo…
## 4 x 11 1 114 Boston, Massachusetts New York, New Yo…
## 5 x 20 1 66 Baltimore, Maryland New York, New Yo…
## 6 x 25 1 54 Pittsburgh, Pennsylvania New York, New Yo…
## 7 x 31 1 48 Cleveland, Ohio New York, New Yo…
## 8 x 33 1 41 Norfolk, Virginia New York, New Yo…
## 9 x 37 1 33 Raleigh-Durham, North Carolina New York, New Yo…
## 10 x 39 1 30 Richmond, Virginia New York, New Yo…
## # … with 40 more rows
#### now map it ####
library(tidyverse)
# "us.cities" in maps package contains This database is of us cities of population
# greater than about 40,000. Also included are state capitals of any
# population size.
# "state" database produces a map of the states of the United States
# mainland generated from US De- partment of the Census data
library(maps)
# Read in 10 city data with distances made in "Get Distances"
cities_10 <- read_csv("distances_top_10.csv")
# Get states for plotting state map
us_states <- as_tibble(map_data("state"))
ggplot(data = us_states, mapping = aes(x = long, y = lat,
group = group)) +
geom_polygon(fill= "white", color = "black") +
geom_point(data = city_data, aes( x = lon.from, y = lat.from,
size = from_population, color = "purple", alpha = 0.5),
inherit.aes = FALSE) +
geom_text(data = city_data, aes(x = lon.from, y = lat.from, label = from.short), size = 3, inherit.aes = FALSE) +
geom_segment(data = solution, aes(x = lon.from, y = lat.from, xend = lon.to,
yend = lat.to), color = "blue", size = 0.3,
arrow = arrow(), inherit.aes = FALSE) +
guides(alpha = FALSE) +
guides(color = FALSE) +
scale_size("Population", labels = comma) +
#scale_color_gradient2(midpoint=6500000,
# low="blue", high="red" ) +
#coord_map("bonne", parameters=45) +
ggthemes::theme_map() +
labs(title = "Visual Solution for 50 Cities and 3 Hubs")+
coord_map("bonne", parameters=45)
Some possible questions we’d like to answer:
What if we change cost to ship, will that change which hubs are chosen?
What if we add a cost function for the actual hub? How will that affect the outcome?
Two additional shipping cost functions were tried as shown below:
\[Cost(Dollars) = 150 + 150\sqrt{miles}\]
\[Cost(Dollars) = 50 + 50\sqrt{miles}\]
There will be referred to as: “High” and “Low”. And the original “Med” shown below: \[Cost(Dollars) = 100 + 100\sqrt{miles}\]
For a hub rental function, the following might be needed:
Mechanical inspection: each bay could do 2 cars a day (round up) (at least 20’x30’)
Clean and prep: each bay could do 2 cars a day (round up) (at least 15’x30’)
Heavy mechanical repair[only 10% of cars]: each bay could do 0.5 cars a day (round up) (at least 20’x30’)
Photo booth: each bay could do 4 cars a day (round up) (at least 20’x30’)
Foreman offices: 30’ by 30’ 20 workdays a month
A similar method was used to for the laborers. And a rate adjustment was used which is another heuristic. For the rate \(R\) used, level adjustments are: High - \(1.25*R\), Medium - \(1*R\), and Low - \(0.5*R\).
450 calculations were planned. For a dataset with 50 metro areas, an optimization would be done for each solution of the number of hubs from 1 to 50. Then, for each solution, the 3 different shipping costs. And, for those, the 3 different Rent/Labor rates.
That’s 50 * 3 * 3 = 450
As it turns out, the 2015 Macbook used wasn’t fully up to the task. Significant more time was needed to solve problems with 50 metro areas and up to 50 hubs. 1 hub solved in about 1 minute. 2 hubs needed about 10 minutes. 3 hubs - 35 minutes. 4 hubs - well over an hour. For 5 hubs, the solver appeared to be about 40% complete at 3 hours and the calculation was stopped. It just wasn’t reasonable to keep going.
Fortunately, 25 metros and up to 25 hubs was significantly quicker. The total time for these smaller optimizations was just over two hours.
A plot of the outcomes is below.
final2 <- read_csv("25_city_results_to_plot.csv")
library(scales)
ggplot(data = final2, mapping = aes(x = num.hubs, y = total.cost)) +
geom_point(data = final2, aes(x = num.hubs, y = total.cost, shape=miles.func, color = rent.func)) +
labs(title = "Total Cost (per month)",
subtitle = "Shipping and Preparing Cars for Speedy Car Sales, Inc.",
x = "Number of Hubs", y = "Total Cost ($)", shape = "Shipping Cost",
color = "Rent & Labor Cost") +
theme(legend.position="bottom") +
scale_y_continuous(label=dollar_format())
The shape of the “Rent & Labor Cost” curves for each H, M, & L level are all very similar. In other words, for the three different Shipping Costs for Low Rent/Labor Cost, the curves are very similar. This indicates Shipping Costs (at least how estimated in this model) have much less impact on the optimal number of hubs than Rent & Labor Costs do. Excluding Rent & Labor Costs all together (not shown here) shows that Costs keep decreasing as more hubs are added. But, of course, hubs cost money to operate.
The amount of Rent & Labor Costs to operate each hub clearly matter in this model. As modeled, one can clearly observe a minimum cost near 9 hubs for H and M costs (as modeled). This is because as more and more hubs are added and fewer and fewer cars go to each hub, there is the expectation that some of the mechanics and vehicle preparers won’t be fully employed 8 hours a day. But, the company still needs to have them.
As shipping costs appear to matter less than rent and labor costs, it might be beneficial to next look at finding locations where the rent and labor rates are lowest. Would an area in middle Pennsylvania be a better location due to reduced hub expenses than placing a hub in New York, Philadelphia, Baltimore, or Washington D.C. even though there might be a slight increase in shipping costs. Probably so.
Cost of the operation of the hubs is more of a factor in changing costs than shipping. [As modeled.]